Slip 23

Q.1 Fit the simple linear regression and polynomial linear regression models to Salary_positions.csv data. Find which one is more accurately fitting to the given data. Also predict the salaries of level 11 and level 12 employees

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score

# Step 1: Load the dataset
dataset = pd.read_csv('Salary_positions.csv')
X = dataset[['Level']]     # Feature: Position level (as DataFrame)
y = dataset['Salary']      # Target: Salary

# Step 2: Simple Linear Regression
lin_reg = LinearRegression()
lin_reg.fit(X, y)

# Step 3: Polynomial Regression (degree 2)
poly = PolynomialFeatures(degree=2)
X_poly = poly.fit_transform(X)

lin_reg_poly = LinearRegression()
lin_reg_poly.fit(X_poly, y)

#Step 4: Visualize Simple Linear Regression
plt.scatter(X, y, color='red')
plt.plot(X, lin_reg.predict(X), color='blue')
plt.title('Simple Linear Regression')
plt.xlabel('Position Level')
plt.ylabel('Salary')
plt.show()

#Step 5: Visualize Polynomial Regression
X_grid = np.arange(min(X['Level']), max(X['Level']) + 0.1, 0.1).reshape(-1, 1)
X_grid_df = pd.DataFrame(X_grid, columns=['Level'])

plt.scatter(X, y, color='red')
plt.plot(X_grid, lin_reg_poly.predict(poly.transform(X_grid_df)), color='green')
plt.title('Polynomial Regression (Degree 2)')
plt.xlabel('Position Level')
plt.ylabel('Salary')
plt.show()

#Step 6: Accuracy Comparison
y_pred_linear = lin_reg.predict(X)
y_pred_poly = lin_reg_poly.predict(X_poly)

r2_linear = r2_score(y, y_pred_linear)
r2_poly = r2_score(y, y_pred_poly)

print(f"R² Score (Linear Regression): {r2_linear:.4f}")
print(f"R² Score (Polynomial Regression): {r2_poly:.4f}")

#Step 7: Predictions for Level 11 and 12 
level_11 = pd.DataFrame([[11]], columns=['Level'])   
level_12 = pd.DataFrame([[12]], columns=['Level'])   

linear_pred_11 = lin_reg.predict(level_11)
linear_pred_12 = lin_reg.predict(level_12)

poly_pred_11 = lin_reg_poly.predict(poly.transform(level_11)) 
poly_pred_12 = lin_reg_poly.predict(poly.transform(level_12))  

print("\nPredicted Salaries:")
print(f"Linear - Level 11: ₹{linear_pred_11[0]:,.2f}")
print(f"Linear - Level 12: ₹{linear_pred_12[0]:,.2f}")
print(f"Polynomial - Level 11: ₹{poly_pred_11[0]:,.2f}")
print(f"Polynomial - Level 12: ₹{poly_pred_12[0]:,.2f}")


Q.2. Write a python program to find all null values from a dataset and remove them.

# Import the pandas library
import pandas as pd

# Step 1: Create a sample dataset with some null (NaN) values
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', None],
    'Age': [25, None, 30, 22, 28],
    'City': ['New York', 'Los Angeles', None, 'Chicago', 'Houston']
}

df = pd.DataFrame(data)
print("Original Dataset:\n", df, "\n")

# Step 2: Find all null values
print("Null Values in Each Column:\n", df.isnull().sum(), "\n")

# Step 3: Remove all rows containing null values
df_cleaned = df.dropna()

# Step 4: Display the cleaned dataset
print("Dataset After Removing Null Values:\n", df_cleaned)
